rm(list = ls())
library("tidyverse")
library("pastecs")
library("haven")
library("caret")
library("funModeling") 
library("stringr")
library("gridExtra")
library("lme4")

##Define here the working directory
setwd("~")

parenth <- function(x) {
  regmatches(x, gregexpr("(?<=\\().*?(?=\\))", x, perl=T))
}

# Specify the width of your confidence intervals
interval1 <- -qnorm((1-0.9)/2)  # 90% multiplier
interval2 <- -qnorm((1-0.95)/2)  # 95% multiplier

# Edu estimation (Kling, no covariates adjustment)  
schoolsK<-read_dta("edu_analysis1.dta", encoding = 'latin1')
schoolsK<-as_tibble(schoolsK)

schoolsK$Se1stderr <-parenth(schoolsK$Se1stderr)
schoolsK$Le1stderr <-parenth(schoolsK$Le1stderr)

schoolNCK <- schoolsK %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  mutate( CovAdjust = "No",Index= "UW", Model="ANCOVA-NC")

# Edu estimation (Kling, with covariates adjustment)  
schools2K<-read_dta("edu_analysis2.dta", encoding = 'latin1')
schools2K<-as_tibble(schools2K)

schools2K$Se1stderr <-parenth(schools2K$Se1stderr)
schools2K$Le1stderr <-parenth(schools2K$Le1stderr)

schoolWCK <- schools2K %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  mutate( CovAdjust = "Yes",Index= "UW", Model="ANCOVA-CA")

# Edu estimation (Anderson, no covariates adjustment)  
schoolsA<-read_dta("edu_Anderson1.dta", encoding = 'latin1')
schoolsA<-as_tibble(schoolsA)

schoolsA$Se1stderr <-parenth(schoolsA$Se1stderr)
schoolsA$Le1stderr <-parenth(schoolsA$Le1stderr)

schoolNCA <- schoolsA %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  mutate( CovAdjust = "No",Index= "W")

# Edu estimation (Anderson, with covariates adjustment)  
schools2A<-read_dta("edu_Anderson2.dta", encoding = 'latin1')
schools2A<-as_tibble(schools2A)

schools2A$Se1stderr <-parenth(schools2A$Se1stderr)
schools2A$Le1stderr <-parenth(schools2A$Le1stderr)

schoolWCA <- schools2A %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  mutate( CovAdjust = "Yes",Index= "W")

# bring-in multilevel modeling
schoolsML<-read_dta("edu_multilevel.dta", encoding = 'latin1')

schoolsMLS<-schoolsML %>% filter(year==1) %>% select(var, estimate, stderr, CovAdjust) %>%
              rename(Se1coef=estimate, Se1stderr=stderr) %>% 
              mutate(Index="UW", Le1coef="NA", Le1stderr="NA", 
              Model= case_when(CovAdjust=="No"~"MLV-NC", CovAdjust=="Yes"~"MLV-CA"))

schoolsMLL<-schoolsML %>% filter(year==2) %>% select(var, estimate, stderr, CovAdjust) %>%
  rename(Le1coef=estimate, Le1stderr=stderr) %>% 
  mutate(Index="UW", Se1coef="NA", Se1stderr="NA", 
         Model= case_when(CovAdjust=="No"~"MLV-NC", CovAdjust=="Yes"~"MLV-CA"))

SchoolCombineA<-rbind(schoolWCA, schoolNCA)
SchoolCombineKS<-rbind(schoolWCK, schoolNCK, schoolsMLS)
SchoolCombineKL<-rbind(schoolWCK, schoolNCK, schoolsMLL)

SchoolCombineA <- transform(SchoolCombineA, 
                  Se1coef = as.numeric(Se1coef), 
                  Se1stderr = as.numeric(Se1stderr),
                  Le1coef = as.numeric(Le1coef),
                  Le1stderr = as.numeric(Le1stderr))

SchoolCombineKS <- transform(SchoolCombineKS, 
                  Se1coef = as.numeric(Se1coef), 
                  Se1stderr = as.numeric(Se1stderr),
                  Le1coef = as.numeric(Le1coef),
                  Le1stderr = as.numeric(Le1stderr))

SchoolCombineKL <- transform(SchoolCombineKL, 
                             Se1coef = as.numeric(Se1coef), 
                             Se1stderr = as.numeric(Se1stderr),
                             Le1coef = as.numeric(Le1coef),
                             Le1stderr = as.numeric(Le1stderr))

# Kling EDU
EduShortK<-ggplot(SchoolCombineKS, aes(colour = Model)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Se1coef - Se1stderr*interval1, ymax = Se1coef + Se1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Se1coef, ymin = Se1coef - Se1stderr*interval2,
                      ymax = Se1coef + Se1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-1)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.8), max(.8), by = .2),1), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.2,.8))

EduLongK<-ggplot(SchoolCombineKL, aes(colour = Model)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Le1coef - Le1stderr*interval1, ymax = Le1coef + Le1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Le1coef, ymin = Le1coef - Le1stderr*interval2,
                      ymax = Le1coef + Le1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-2)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.8), max(.8), by = .2),1), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.2,.8))

# Anderson EDU
EduShortA<-ggplot(SchoolCombineA, aes(colour = CovAdjust)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Se1coef - Se1stderr*interval1, ymax = Se1coef + Se1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Se1coef, ymin = Se1coef - Se1stderr*interval2,
                      ymax = Se1coef + Se1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-1)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.75), max(.75), by = .25),2), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.9))

EduLongA<-ggplot(SchoolCombineA, aes(colour = CovAdjust)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Le1coef - Le1stderr*interval1, ymax = Le1coef + Le1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Le1coef, ymin = Le1coef - Le1stderr*interval2,
                      ymax = Le1coef + Le1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-2)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.75), max(.75), by = .25),2), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.9))

# without covariates
EduShort<-ggplot(SchoolCombineKS, aes(colour = Index)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Se1coef - Se1stderr*interval1, ymax = Se1coef + Se1stderr*interval1),
 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Se1coef, ymin = Se1coef - Se1stderr*interval2,
   ymax = Se1coef + Se1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
   shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-1)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.75), max(.75), by = .25),2), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.9))

EduLong<-ggplot(SchoolCombineKL, aes(colour = Index)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Le1coef - Le1stderr*interval1, ymax = Le1coef + Le1stderr*interval1),
      lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Le1coef, ymin = Le1coef - Le1stderr*interval2,
      ymax = Le1coef + Le1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
      shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-2)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.75), max(.75), by = .25),2), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.9))


library(grid)


aux1 <- grid.arrange(EduShortK,EduLongK, ncol=2, top=textGrob("Education outcomes (unweighted indices)", gp=gpar(fontsize=30,font=8)))

aux2 <- grid.arrange(EduShortA,EduLongA, ncol=2, top=textGrob("Education outcomes (weighted indices)", gp=gpar(fontsize=30,font=8)))


###################################################
# Health
###################################################
# Health estimation (Kling, no covariates adjustment)  
ClinicsK<-read_dta("Health_analysis1.dta", encoding = 'latin1')

ClinicsK$Se1stderr <-parenth(ClinicsK$Se1stderr)
ClinicsK$Le1stderr <-parenth(ClinicsK$Le1stderr)

ClinicNCK <- ClinicsK %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>%
  mutate( CovAdjust = "No",Index= "UW", Model="ANCOVA-NC")

# Health estimation (Kling, with covariates adjustment)  
Clinics2K<-read_dta("Health_analysis2.dta", encoding = 'latin1')

Clinics2K$Se1stderr <-parenth(Clinics2K$Se1stderr)
Clinics2K$Le1stderr <-parenth(Clinics2K$Le1stderr)

ClinicWCK <- Clinics2K %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>%
  mutate( CovAdjust = "Yes",Index= "UW", Model="ANCOVA-CA")

# Health estimation (Anderson, no covariates adjustment)  
ClinicsA<-read_dta("health_Anderson1.dta", encoding = 'latin1')
ClinicsA<-as_tibble(ClinicsA)

ClinicsA$Se1stderr <-parenth(ClinicsA$Se1stderr)
ClinicsA$Le1stderr <-parenth(ClinicsA$Le1stderr)

ClinicNCA <- ClinicsA %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>%
  mutate( CovAdjust = "No",Index= "W")

# Health estimation (Anderson, with covariates adjustment)  
ClinicsA2<-read_dta("health_Anderson2.dta", encoding = 'latin1')
ClinicsA2<-as_tibble(ClinicsA2)

ClinicsA2$Se1stderr <-parenth(ClinicsA2$Se1stderr)
ClinicsA2$Le1stderr <-parenth(ClinicsA2$Le1stderr)

ClinicWCA <- ClinicsA2 %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>%
  mutate( CovAdjust = "Yes",Index= "W")

# bring-in multilevel modeling
ClinicML<-read_dta("health_multilevel.dta", encoding = 'latin1')

ClinicMLS<-ClinicML %>% filter(year==1) %>% select(var, estimate, stderr, CovAdjust) %>%
  rename(Se1coef=estimate, Se1stderr=stderr) %>% 
  mutate(Index="UW", Le1coef="NA", Le1stderr="NA", 
         Model= case_when(CovAdjust=="No"~"MLV-NC", CovAdjust=="Yes"~"MLV-CA"))

ClinicMLL<-ClinicML %>% filter(year==2) %>% select(var, estimate, stderr, CovAdjust) %>%
  rename(Le1coef=estimate, Le1stderr=stderr) %>% 
  mutate(Index="UW", Se1coef="NA", Se1stderr="NA", 
         Model= case_when(CovAdjust=="No"~"MLV-NC", CovAdjust=="Yes"~"MLV-CA"))

ClinicCombineA<-rbind(ClinicWCA, ClinicNCA)
ClinicCombineKS<-rbind(ClinicWCK, ClinicNCK, ClinicMLS)
ClinicCombineKL<-rbind(ClinicWCK, ClinicNCK, ClinicMLL)

ClinicCombineA <- transform(ClinicCombineA, 
                            Se1coef = as.numeric(Se1coef), 
                            Se1stderr = as.numeric(Se1stderr),
                            Le1coef = as.numeric(Le1coef),
                            Le1stderr = as.numeric(Le1stderr))

ClinicCombineKS <- transform(ClinicCombineKS, 
                            Se1coef = as.numeric(Se1coef), 
                            Se1stderr = as.numeric(Se1stderr),
                            Le1coef = as.numeric(Le1coef),
                            Le1stderr = as.numeric(Le1stderr))

ClinicCombineKL <- transform(ClinicCombineKL, 
                             Se1coef = as.numeric(Se1coef), 
                             Se1stderr = as.numeric(Se1stderr),
                             Le1coef = as.numeric(Le1coef),
                             Le1stderr = as.numeric(Le1stderr))

# Kling (unweighted)
HealthShortK<-ggplot(ClinicCombineKS, aes(colour = Model)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Se1coef - Se1stderr*interval1, ymax = Se1coef + Se1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Se1coef, ymin = Se1coef - Se1stderr*interval2,
                      ymax = Se1coef + Se1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Health (Year-1)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index", "Utilization index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.8), max(.8), by = .2),1), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.8))

HealthLongK<-ggplot(ClinicCombineKL, aes(colour = Model)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Le1coef - Le1stderr*interval1, ymax = Le1coef + Le1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Le1coef, ymin = Le1coef - Le1stderr*interval2,
                      ymax = Le1coef + Le1stderr*interval2), size=1,lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Health (Year-2)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index", "Utilization index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.8), max(.8), by = .2),1), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.8))

# Anderson (weighted indices)
HealthShortA<-ggplot(ClinicCombineA, aes(colour = Index)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Se1coef - Se1stderr*interval1, ymax = Se1coef + Se1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Se1coef, ymin = Se1coef - Se1stderr*interval2,
                      ymax = Se1coef + Se1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Health (Year-1)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.75), max(.75), by = .25),2), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.9))

HealthLongA<-ggplot(ClinicCombineA, aes(colour = Index)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Le1coef - Le1stderr*interval1, ymax = Le1coef + Le1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Le1coef, ymin = Le1coef - Le1stderr*interval2,
                      ymax = Le1coef + Le1stderr*interval2), size=1,lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Health (Year-2)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-.75), max(.75), by = .25),2), limits = c(-1, 1)) +
  theme(text = element_text(size=14, face = "bold", vjust = 1.5), legend.position=c(.8,.9))

#### figures
aux3 <- grid.arrange(HealthShortK,HealthLongK, ncol=2, top=textGrob("Health", gp=gpar(fontsize=18,font=8)))

aux4 <- grid.arrange(EduShortA,EduLongA, HealthShortA,HealthLongA, ncol=2, top=textGrob("Health and education outcomes (weighted indices)", gp=gpar(fontsize=30,font=8)))

###FIGURE 3 in paper###

Figure3 <- grid.arrange(EduShortK,EduLongK, HealthShortK,HealthLongK, ncol=2, top=textGrob("Health and education outcomes (unweighted indices)", gp=gpar(fontsize=30,font=8)))


######################################
# water 
######################################
water1<-read_dta("water_kling.dta", encoding = 'latin1')
water2<-read_dta("water_multilevel.dta", encoding = 'latin1')

waterA<-water1 %>% select(-dof) %>%
  mutate(Model= case_when(CovAdjust=="No"~"ANCOVA-NC", CovAdjust=="Yes"~"ANCOVA-CA"))

waterK<-water2 %>% rename(t="z") %>%
  mutate(Model= case_when(CovAdjust=="No"~"MLV-NC", CovAdjust=="Yes"~"MLV-CA"))

water<-rbind(waterA, waterK) 

### Generate Figure 4 in paper ###

Figure4 <- ggplot(water, aes(colour = Model)) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = estimate - stderr*interval1, ymax = estimate + stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = estimate, ymin = estimate - stderr*interval2,
                      ymax = estimate + stderr*interval2), size=1,lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Water") + 
  scale_x_discrete("", limits = rev(c("Parts & services", "Village requests"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-2), max(2), by = .25),2), limits = c(-2.06, 2.06)) +
  theme(text = element_text(size=15, face = "bold", vjust = 1.5), legend.position=c(.2,.82))

Figure4


######################################
# quasi control 
######################################
schoolsKQ<-read_dta("edu_QuasiControl1.dta", encoding = 'latin1')
schoolsKQ<-as_tibble(schoolsKQ)

schoolsKQ$Se1stderr <-parenth(schoolsKQ$Se1stderr)
schoolsKQ$Le1stderr <-parenth(schoolsKQ$Le1stderr)

schoolNCKQ <- schoolsKQ %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  mutate( CovAdjust = "No",Index= "UW")

# Edu estimation (Anderson, no covariates adjustment)  
schoolsAQ<-read_dta("edu_QuasiControlA1.dta", encoding = 'latin1')
schoolsAQ<-as_tibble(schoolsAQ)

schoolsAQ$Se1stderr <-parenth(schoolsAQ$Se1stderr)
schoolsAQ$Le1stderr <-parenth(schoolsAQ$Le1stderr)

schoolNCAQ <- schoolsAQ %>% select_("var", "Se1coef", "Se1stderr", "Le1coef","Le1stderr") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  mutate( CovAdjust = "No",Index= "W")

SchoolCombineQ<-rbind(schoolNCKQ, schoolNCAQ)
SchoolCombineQ <- transform(SchoolCombineQ, 
                           Se1coef = as.numeric(Se1coef), 
                           Se1stderr = as.numeric(Se1stderr),
                           Le1coef = as.numeric(Le1coef),
                           Le1stderr = as.numeric(Le1stderr))

EduShortQ<-ggplot(SchoolCombineQ) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Se1coef - Se1stderr*interval1, ymax = Se1coef + Se1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Se1coef, ymin = Se1coef - Se1stderr*interval2,
                      ymax = Se1coef + Se1stderr*interval2),size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-1)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-1.25), max(1.25), by = .25),2), limits = c(-1.25, 1.25)) +
  theme(text = element_text(size=15, face = "bold", vjust = 1.5), legend.position=c(.2,.9))

### Generate Figure 6 in Supplementary Information ###

EduLongQ<-ggplot(SchoolCombineQ) +
  geom_hline(yintercept = 0, colour = gray(1/2), lty = 2) +
  geom_linerange(aes(x = var, ymin = Le1coef - Le1stderr*interval1, ymax = Le1coef + Le1stderr*interval1),
                 lwd = 2, position = position_dodge(width = 1/2)) +
  geom_pointrange(aes(x = var, y = Le1coef, ymin = Le1coef - Le1stderr*interval2,
                      ymax = Le1coef + Le1stderr*interval2), size=1, lwd = 1/3, position = position_dodge(width = 1/2),
                  shape = 21, fill = "BLACK") + coord_flip() + theme_bw() + ggtitle("Edu (Year-2)") + 
  scale_x_discrete("", limits = rev(c("Monitoring index", "Effort index", "Input index"))) +
  scale_y_continuous(name="",  breaks = round(seq(min(-1.25), max(1.25), by = .25),2), limits = c(-1.25, 1.25)) +
  theme(text = element_text(size=15, face = "bold", vjust = 1.5), legend.position=c(.2,.9))

Figure6 <- grid.arrange(EduShortQ,EduLongQ, ncol=2, top=textGrob("Education (compared to quasi-control)", gp=gpar(fontsize=22,font=8, face = "bold")))


##############################
# multiple hypotheses
##############################

# Edu p-values
school1 <- schoolsK %>% select_("var", "Se1pval") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  rename(pval=Se1pval) %>% mutate(year=1, sector="Edu")

school2 <- schoolsK %>% select_("var", "Le1pval") %>% 
  filter(grepl('index', var)) %>% filter(!grepl('Performance', var)) %>%
  rename(pval=Le1pval) %>% mutate(year=2, sector="Edu")

clinic1 <- ClinicsK %>% select_("var", "Se1pval") %>% 
  filter(grepl('index', var)) %>% rename(pval=Se1pval) %>% mutate(year=1, sector="Health")

clinic2 <- ClinicsK %>% select_("var", "Le1pval") %>% 
  filter(grepl('index', var)) %>% rename(pval=Le1pval) %>% mutate(year=2, sector="Health")

Waterf <- water1 %>% select_("var", "p", "CovAdjust") %>% 
  filter(CovAdjust=="No") %>% rename(pval=p) %>% mutate(year=2, sector="Water") %>% 
  select(-c(CovAdjust))

pdat <-rbind(school1, school2, clinic1, clinic2, Waterf)
pdat$pval <-as.numeric(pdat$pval)
p <-pdat$pval 
p <- sort(p, decreasing = FALSE)
 
# Choose alpha level
alpha <- 0.10

# Without any corrections
sig <- p < alpha

# Conduct three corrections and compare to target alpha
bonferroni_sig <- p.adjust(p, "bonferroni") < alpha
holm_sig <- p.adjust(p, "holm") < alpha
BH_sig <- p.adjust(p, "BH") <alpha
pvalBH<-p.adjust(p, "BH")

pdat <-pdat %>% arrange(desc(-pval))

pdat2<- cbind(pdat, pvalBH)
pdat3 <-pdat2[,c(1,4,3, 2,5)]

### Generate Table 16 in Supplementary Information ###
pdat3

# only education
pdatEdu <-rbind(school1, school2)
pdatEdu$pval <-as.numeric(pdatEdu$pval)
pedu <-pdatEdu$pval 
pedu <- sort(pedu, decreasing = FALSE)

(pvalBHEdu<-p.adjust(pedu, "BH"))

# power
powerAN = function(NSchools, NClusters, ICC, lambda=0.25, alpha=0.10) {
  NClusters = NClusters/2    
  f = 1 + (NSchools - 1) * ICC
  p = pnorm((lambda) * sqrt((NClusters*NSchools)/(2*f)) - qnorm(1-alpha/2))+ pnorm((-1)*(lambda) * sqrt((NClusters*NSchools)/(2*f)) - qnorm(1-alpha/2)) 
  return(p) 
}

## To report
powerAN(NSchools=90, NClusters=48, ICC=0.085, lambda=0.25, alpha=0.05)
powerAN(NSchools=90, NClusters=48, ICC=0.18, lambda=0.18, alpha=0.05)
powerAN(NSchools=90, NClusters=48, ICC=0.18, lambda=0.355, alpha=0.05)


## message data
messages<-read_dta("Village_Data_2016_Census_Analysis_Clean_withLegacyDataSetIDs.dta", encoding = 'latin1')

### Generate graphs of message determinants (ELF, Distance to Arua, Distance to HC, and Share secondary education)
### Graph not included in Supplementary Information

messmall <-messages %>% filter(TREATMENT_FINAL==1) %>%
  select(mess_per100, send_per100, HHI_Ethnicity, Arua_dist,HC_dist,adult_pop, secondary_lc1, literacy_lc1) %>%
  mutate(Arua_dist=Arua_dist/1000, HC_dist=HC_dist/1000)

F1<-ggplot(messmall, aes(x=HHI_Ethnicity, y=mess_per100)) +geom_point(size=3) +
  theme_bw() + geom_smooth(method = "loess", size = 1.5) +
  scale_x_continuous(name="ELF",  breaks = round(seq(min(0), max(.5), by = .1),1), limits = c(0, .5)) +
  scale_y_continuous(name="Messages per 100",  breaks = round(seq(min(0), max(50), by = 25),0), limits = c(0, 50))+
  theme(text = element_text(size=18, face = "bold", vjust = 1.5))

F2<-ggplot(messmall, aes(x=Arua_dist, y=mess_per100)) +geom_point(size=3) +
  theme_bw() + geom_smooth(method = "loess", size = 1.5) +
  scale_x_continuous(name="Distance to Arua",  breaks = round(seq(min(0), max(50), by = 10),0), limits = c(0, 50)) +
  scale_y_continuous(name="Messages per 100",  breaks = round(seq(min(0), max(50), by = 10),0), limits = c(0, 50)) +
  theme(text = element_text(size=18, face = "bold", vjust = 1.5))

F3<-ggplot(messmall, aes(x=HC_dist, y=mess_per100)) +geom_point(size=3) +
  theme_bw() + geom_smooth(method = "loess", size = 1.5) +
  scale_x_continuous(name="Distance to HC",  breaks = round(seq(min(0), max(6), by = 1),0), limits = c(0, 6)) +
  scale_y_continuous(name="Messages per 100",  breaks = round(seq(min(0), max(50), by = 10),0), limits = c(0, 50)) +
  theme(text = element_text(size=18, face = "bold", vjust = 1.5))

F4<-ggplot(messmall, aes(x=adult_pop, y=mess_per100)) +geom_point(size=3) +
  theme_bw() + geom_smooth(method = "loess", size = 1.5) +
  scale_x_continuous(name="Distance to HC",  breaks = round(seq(min(0), max(750), by = 100),0), limits = c(0, 750)) +
  scale_y_continuous(name="Messages per 100",  breaks = round(seq(min(0), max(50), by = 10),0), limits = c(0, 50)) +
  theme(text = element_text(size=18, face = "bold", vjust = 1.5))

F5<-ggplot(messmall, aes(x=secondary_lc1, y=mess_per100)) +geom_point(size=3) +
  theme_bw() + geom_smooth(method = "loess", size = 1.5) +
  scale_x_continuous(name="Share secondary edu",  breaks = round(seq(min(0), max(.75), by = .25),2), limits = c(0, .75)) +
  scale_y_continuous(name="Messages per 100",  breaks = round(seq(min(0), max(50), by = 10),0), limits = c(0, 50)) +
  theme(text = element_text(size=18, face = "bold", vjust = 1.5))
        
grid.arrange(F1,F2, F3, F5, ncol=2, top=textGrob("Message determinantes", gp=gpar(fontsize=30,font=8, face = "bold")))